****************************************************************************************************************
**																											  **
** This do file is part of the replication material for the following article: 								  **
**  "Is There an Ideological Asymmetry in the Incumbency Effect? Evidence from U.S. Congressional Elections." **
** 		Authors: Morisi, Davide, Jost, John, Panagopoulos, Costas, and Jussi Valtonen		    			  **
** 		Journal: Social Psychological and Personality Science																	  **
**																											  **
** This file replicates the following step of the analysis: 												  **
**																											  **
**		RECODING CCES 2006 ORIGINAL DATASET			   													  	  **
**																											  **
****************************************************************************************************************

*Set working directory
cd "C:\Users\Davide Morisi\Dropbox\datasets\CCES"

*Open original dataset
*Stephen Ansolabehere, 2010, "CCES Common Content, 2006", https://doi.org/10.7910/DVN/Q8HC9N, Harvard Dataverse, V4, UNF:5:Zz4+e5bz7lzeLOjQCUk+lw== [fileUNF] 
*Downloaded on October 20, 2020 from https://dataverse.harvard.edu/dataset.xhtml?persistentId=hdl:1902.1/14002
use "2006\cces_2006_common.dta", clear


*********************
*covariates

*gender
fre v2004
rename v2004 gender
*age
sum v2020
rename v2020 birthyr
*education
fre v2018
rename v2018 educ
*race
fre v2005
recode v2005 4/8=99, gen(racer) // 99=other
*religion
fre v2022
recode v2022 3/4=3 5=6 6/7=3, gen(religpewr) // note: atheist & agnostic missing
fre religpewr
*economy
fre v3008
rename v3008 economy
*state
fre v1002
rename v1002 state06_orig
/*recode state with coding used in other datasets
recode stater 1=1	2=2	3=4	4=5	5=6	6=8	7=9	8=10	9=11	10=12	11=13	12=15 ///
13=16	14=17	15=18	16=19	17=20	18=21 19=22	20=23	21=24	22=25	23=26 ///
24=27	25=28	26=29	27=30	28=31	29=32	30=33	31=34	32=35	33=36	34=37 ///
35=38	36=39	37=40	38=41	39=42	40=44	41=45	42=46	43=47	44=48	45=49 ///
46=50	47=51	48=53	49=54	50=55	51=56, gen(state)
fre state*/
clonevar state06 = state06_orig
*weight
sum v1001
rename v1001 weight1


***
*partisanship / ideology

*party of respondent - profile
fre v3007 // 7 pointPID
fre v3005 // 3pointPID
ta v3007 v3005  
rename v3007 pid7
rename v3005 pid3 // leaning partisans coded as independent


*party id post
fre v4034 // Post Election 3 point party ID
rename v4034 pid3post
*5 cat, including leaners
fre pid3post
fre v4036 // Party Leaning of Independents
*1 dem, 2 lean dem, 3 indep, 4 lean rep, 5 rep
gen pid5post = 1 if pid3post==1
replace pid5post = 2 if pid3post==3 & v4036==3 
replace pid5post = 3 if pid3post==3 & v4036==4
replace pid5post = 4 if pid3post==3 & v4036==5
replace pid5post = 5 if pid3post==2
fre pid5post

***
*ideology

*ideology PROFILE
*1 very lib, 2 lib, 3 moderate, 4 cons, 5 very cons, 6 dk
fre v2021
rename v2021 ideo5
fre ideo5

*ideology from pre-election survey
*The scale below represents the ideological spectrum from very liberal (0) to very conservative (100).
*The most centrist American is exactly at the middle (50).
fre v3042 // coded from 0 to 100
clonevar ideo_2006 = v3042
*1 very lib, 2 lib, 3 somewhat lib, 4 mod, 5 somewhat con, 6 con, 7 very con
recode v3042 0/10=1 11/39=2 40/49=3 50=4 51/60=5 61/89=6 90/100=7 101=., gen(ideo7)
recode v3042 0/10=1 11/39=2 40/49=3 50=4 51/60=5 61/89=6 90/100=7 101=4, gen(ideo7_06_dkmod)
recode v3042 0/10=1 11/35=2 36/45=3 46/54=4 55/64=5 65/89=6 90/100=7 101=., gen(ideo7_06_alt1)
recode v3042 0/17=1 18/34=2 35/49=3 50=4 51/65=5 66/82=6 83/100=7 101=., gen(ideo7_06_alt2)


************
*vote post

*voted?
*v4004 -- Did R vote in the 2006 election
fre v4004
rename v4004 turnout06
clonevar turnout = turnout06

*senate
*fre CC355 // CC355 Senate Vote Intent (In the race for U.S. Senator in your state, who do you prefer?)
fre v4014 // Senator vote - For whom did you vote for U.S. Senator?
rename v4014 votesen06
gen vote_sen = votesen06
recode vote_sen 3=4 6=3 7=3 8=3 9=3 // 1 dem cand, 2 rep cand, 3 other, 4 did not vote, 5 dk
fre votesen06 vote_sen

*house
fre v4015 // For whom did you vote for U.S. House of Representatives?
rename v4015 votehou06
fre votehou06
gen vote_hou = votehou06
recode vote_hou 3=4 6=3 7=3 // 1 dem cand, 2 rep cand, 3 other, 4 did not vote, 5 dk
fre vote_hou


****
*Create incumbent var - SENATE

*Party of candidates
*1=dem, 2=rep, 3=other
fre v5006 v5008
rename v5006 SenCand1Party_postr
rename v5008 SenCand2Party_postr
fre SenCand1Party_postr SenCand2Party_postr

*Candidate 1
fre v5005
encode v5005, gen(SenCand1Name_postr)
fre SenCand1Name_postr
fre SenCand1Name_postr if SenCand1Party_postr==3 // exclude Sanders (independent)
ta SenCand1Name_postr if state06_orig=="MA"
*Candidate 2
fre v5007
encode v5007, gen(SenCand2Name_postr)
fre SenCand2Name_postr
fre SenCand2Name_postr if SenCand2Party_postr==3 // exclude 14 Joseph I. Lieberman
ta SenCand2Name_postr if state06_orig=="MA"
list SenCand1Name_postr SenCand1Party_postr vote_sen in 1000/1020
list SenCand2Name_postr SenCand2Party_postr vote_sen in 1000/1020

*Create incumbent var (see excel sheet, based on https://en.wikipedia.org/wiki/2006_United_States_Senate_elections)
fre SenCand1Name_postr
gen SenCand1Incumbent_postr = 0 if SenCand1Name_postr<33
replace SenCand1Incumbent_postr = 1 if SenCand1Name==9	| SenCand1Name==20	| SenCand1Name==28	| SenCand1Name==25	| SenCand1Name==32	| SenCand1Name==24	| SenCand1Name==11	| SenCand1Name==13	| SenCand1Name==16	| SenCand1Name==5	| SenCand1Name==12	| SenCand1Name==17	| SenCand1Name==10
fre SenCand1Incumbent_postr

fre SenCand2Name_postr
gen SenCand2Incumbent_postr = 0 if SenCand2Name_postr<34
replace SenCand2Incumbent_postr = 1 if SenCand2Name==7	| SenCand2Name==3	| SenCand2Name==18	| SenCand2Name==22	| SenCand2Name==10	| SenCand2Name==25	| SenCand2Name==16	| SenCand2Name==13	| SenCand2Name==33	| SenCand2Name==30	| SenCand2Name==24	| SenCand2Name==8	| SenCand2Name==4
fre SenCand2Incumbent_postr

ta SenCand1Incumbent_postr SenCand2Incumbent_postr // good
fre SenCand1Incumbent_postr // senate dem candidate incumbent
fre SenCand2Incumbent_postr // senate rep candidate incumbent

*random check
*no incumbent in maryland (MD) / rep inc in pennsylvania / clinton inc in ny
list SenCand1Name_postr SenCand1Incumbent_postr SenCand2Name_postr SenCand2Incumbent_postr state06_orig in 1000/1010
list SenCand1Name_postr SenCand1Incumbent_postr SenCand2Name_postr SenCand2Incumbent_postr state06_orig in 2000/2010

*states without incumbent
*connecticut, maryland, minnesota, new jersey, tennessee, vermont
fre state06_orig if SenCand1Incumbent_postr==0 & SenCand2Incumbent_postr==0 


*republican vs dem incumbent
ta SenCand1Incumbent_postr SenCand2Incumbent_postr if (SenCand1Party_postr==1 & SenCand2Party_postr==2)
gen inc_rep_sen = 0 if (SenCand1Party_postr==1 & SenCand2Party_postr==2) & (SenCand1Incumbent_postr==1 & SenCand2Incumbent_postr==0)
replace inc_rep_sen = 1 if (SenCand1Party_postr==1 & SenCand2Party_postr==2) & (SenCand1Incumbent_postr==0 & SenCand2Incumbent_postr==1)
fre inc_rep_sen // 0= dem inc (rep not inc), 1= rep inc (dem not inc)
*including districts with no dem/rep incumbents
ta SenCand1Incumbent_postr SenCand2Incumbent_postr
gen inc_all_sen = inc_rep
recode inc_all_sen 0=-1
replace inc_all_sen = 0 if (SenCand1Party_postr==1 & SenCand2Party_postr==2) & (SenCand1Incumbent_postr==0 & SenCand2Incumbent_postr==0)
ta inc_all_sen // -1=dem inc, 0=inc not dem nor rep, 1=rep inc

*keep only states with elections, not open elections
ta state06_orig inc_rep_sen, nol // no need for filter
gen filter_sen=1 if inc_rep!=.



**********************
*contextual var HOUSE

*HOUSE > NOTE NO INCUMBENT INFO, RELY ON EXCEL DATA

*party of candidate
*1=dem, 2=rep, 3=other
fre v5002  // dem candidate
fre v5004 // rep candidate
gen HouseCand1Party_postr = v5002
gen HouseCand2Party_postr = v5004
label val HouseCand1Party_postr partycand
label val HouseCand2Party_postr partycand
ta HouseCand1Party_postr HouseCand2Party_postr


*******************
*combined variables > vote + incumbent status

*vote + incumbent status, all options - SENATE
fre turnout vote_sen
fre inc_all_sen // -1=dem inc, 0=inc not dem nor rep, 1=rep inc
ta vote_sen inc_all_sen
gen votinc_sen_all = 0 if (turnout>1 & turnout!=.) | vote_sen==4 // did not vote
replace votinc_sen_all = 10 if vote_sen==1 & inc_all_sen==-1
replace votinc_sen_all = 11 if vote_sen==1 & inc_all_sen==0
replace votinc_sen_all = 12 if vote_sen==1 & inc_all_sen==1
replace votinc_sen_all = 20 if vote_sen==2 & inc_all_sen==-1
replace votinc_sen_all = 21 if vote_sen==2 & inc_all_sen==0
replace votinc_sen_all = 22 if vote_sen==2 & inc_all_sen==1
*label var votinc_sen_all "Voted for dem/rep by incumbent status - SENATE"
*label de votinc_sen_all 0"did not vote" 10"vote dem, dem inc"  11"vote dem, no inc"  12"vote dem, rep inc" ///
*20"vote rep, dem inc"  21"vote rep, no inc"  22"vote rep, rep inc" 
*label val votinc_sen_all votinc_sen_all
fre votinc_sen_all

******
*variables for merging datasets

gen year = 2006

rename v1000 caseid06
clonevar caseid = caseid06

ta turnout, m
gen tookpost = 1 if turnout!=.


****
*create congressional districts
fre state06_orig
drop if state06_orig=="DC"
encode state06_orig, gen(staten1)
fre staten1
recode staten1 1=2	2=1	3=4	4=3	5=5	6=6	7=7	8=8	9=9	10=10	11=11	12=15	13=12	14=13	15=14	16=16	17=17	18=18	19=21	20=20	21=19	22=22	23=23	24=25	25=24	26=26	27=33	28=34	29=27	30=29	31=30	32=31	33=28	34=32	35=35	36=36	37=37	38=38	39=39	40=40	41=41	42=42	43=43	44=44	45=46	46=45	47=47	48=49	49=48	50=50, gen(staten)
*recode in correct order like other datasets
label var staten "States from 1 to 50 in alphabetical order"
fre staten // alphabetical order, from 0 to 50
ta state06_orig staten if staten<6
ta state06_orig staten if staten>44

*combine with district
fre v1003
rename v1003 cd2006
fre cd2006
fre cd2006 if staten==2
fre cd2006 if state06_orig=="CA"
fre cd2006 if state06_orig=="TX"
*probl: in single district state, district coded as 0 instead of 1
*single district states: alaska, delaware, montana, north dakota, south dakota, vermont, wyoming
fre staten if cd2006==0
gen single_district = 1 if staten==2 | staten==8 | staten==26 | staten==34 | staten==41 | staten==45 | staten==50
fre cd2006 if single_district==1
gen cd2006_correct = cd2006 if single_district!=1
replace cd2006_correct = 1 if single_district==1
fre cd2006_correct

*now turn into string
tostring cd2006_correct, gen(cd2006str)
fre cd2006str // problem value 1 instead of 01, 2 instead of 02, etc.
fre cd2006str if staten==2

*turn 1 into 01, 2 into 02 etc.
gen cd2006_first = (substr(cd2006str,1,1))
gen cd2006_second = (substr(cd2006str,2,1))
encode cd2006_first, gen(cd2006_firsten)
encode cd2006_second, gen(cd2006_seconden)
fre cd2006_first if cd2006_seconden==.
gen value0 = 0
egen cd2006_first_correct=concat(value0 cd2006_first) if cd2006_firsten!=. & cd2006_seconden==.
fre cd2006_first cd2006_first_correct if cd2006_seconden==.

gen cd2006str_correct = cd2006_first_correct if cd2006_seconden==.
replace cd2006str_correct = cd2006str if cd2006_seconden!=.
label var cd2006str_correct "Post  Congressional district 2006 1=01"
fre cd2006str cd2006str_correct if staten==2
fre cd2006str cd2006str_correct if staten==9
fre cd2006str cd2006str_correct if staten==50

egen stcd=concat(staten cd2006str_correct)

*double check > all correct
fre stcd if staten==9 // florida
fre stcd if staten==9 & cd2006!=.
fre stcd if staten==45 // vermont
fre stcd if staten==45 & cd2006!=.

*
rename stcd stcd_string
gen stcd = real(stcd_string)
label var stcd_string "State (1 to 50 alphabetical) + congr. district (01 to n) STRING"
label var stcd "State (1 to 50 alphabetical) + congr. district (01 to n)"

***
/*merge with contextual var

save "CCES_2006_recoded2.dta", replace
import excel "C:\Users\Davide Morisi\Dropbox\research_inprogress\incumbency_effect\Other files\hr4618.xls", sheet("2006") firstrow clear
sum year stcd inc pwin dv dvp fr po1 po2 redist dexp rexp dpres switchb switcha
rename po1 p1_orig
rename po2 p2_orig
gen po1 = real(p1_orig) // transform dv var into numeric
gen po2 = real(p2_orig) // transform dv var into numeric
sum year stcd inc pwin dv dvp fr po1 po2 redist dexp rexp dpres switchb switcha
drop year p1_orig p2_orig switchb switcha
save "C:\Users\Davide Morisi\Dropbox\research_inprogress\incumbency_effect\Replication do files\Data\house_data_2006.dta", replace

use "CCES_2006_recoded2.dta", clear
*/
merge m:1 stcd using "C:\Users\Davide Morisi\Dropbox\research_inprogress\incumbency_effect\Replication do files\Data\house_data_2008.dta" // all matched
drop pwin dv dvp fr redist dexp rexp dpres _merge
rename inc inc_post
rename po1 po1_post
rename po2 po2_post

merge m:1 stcd using "C:\Users\Davide Morisi\Dropbox\research_inprogress\incumbency_effect\Replication do files\Data\house_data_2006.dta" // all matched
drop _merge

***
*voter registration status post
fre v3004
recode v3004 (2 3=0 "No/DK") (1=1 "Yes"), gen(register_post)
label var register_post "Registered to vote? (post)"
fre register_post

***
*job approval
*Please indicate whether you approve or disapprove of the job that each of the following are doing.
* 1 Strongly approve
* 2 somewhat Approve
* 3 somewaht Disapprove
* 4 Strongly disapprove
* 5 not sure

*House representative
fre v3017 
rename v3017 approval_house

*senate
*2 senators for each state. 
fre v2013 v3015
rename v2013 approval_sen1
rename v3015 approval_sen2


***
*keep only selected variables
keep gender birthyr educ racer religpewr state06_orig weight1 economy ///
pid3 pid7 pid3post pid5post ///
ideo5 ideo7 ideo7_06_dkmod ideo7_06_alt1 ideo7_06_alt2 ideo_2006 ///
turnout turnout06 vote_sen votesen06 vote_hou votehou06 ///
SenCand1Incumbent_postr SenCand2Incumbent_postr SenCand1Party_postr SenCand2Party_postr ///
inc_rep_sen inc_all_sen filter_sen ///
HouseCand1Party_postr HouseCand2Party_postr ///
votinc_sen_all ///
year caseid caseid06 tookpost ///
staten ///
cd2006 single_district cd2006str cd2006str_correct stcd_string stcd ///
inc pwin dvp fr po1 po2 redist dexp rexp dpres dv ///
register_post approval_house approval_sen1 approval_sen2 ///
inc_post po1_post po2_post

*save recoded file
save "CCES_2006_recoded2.dta", replace









